# This file assesses the relationship between perceptions of local communities
# and outcomes conditional on matched pair.  This is based on matches where 31
# cases only were dropped and perceptions are allowed to continue past 1

library(dplyr)
library(ggplot2)
library(lme4)
library(lmerTest)
library(fixest)
library(here)
library(xtable)
library(sp)
library(robustbase)
library(estimatr)
library(brms)

load(here("Design", "matches_anyDA_new.rda"), verbose = TRUE)
load(here("Data", "wrkdatOwnMap_anyDA_new.rda"), verbose = TRUE)
source(here("Design", "nonbimatchingfunctions.R"))

wdat0 <- inner_join(wrkdatOwnMap_new, matches_anyDA_new)

## Add information about which unit is ranked higher versus lower within pair
wdat0 <- wdat0 %>%
  group_by(pair) %>%
  mutate(
    perc_more = rank(vm.community.subj) - 1,
    social_capital01_rank = rank(social.capital) - 1,
    community_resp01_rank = rank(community.resp01) - 1
  ) %>%
  ungroup()

table(wdat0$perc_more, exclude = c())
table(wdat0$social_capital01_rank, exclude = c())
table(wdat0$community_resp01_rank, exclude = c())

summary(wdat0[, c("perc_more", "social_capital01_rank", "community_resp01_rank")])

## vm.community.subj is perception of own drawn map. This next creates a
## group-level variable that is the average perception of the group. If there
## is a correlation between the within-pair difference in perceptions (i..e the
## individual level effect) and the across pair differences (the group level
## effect), this reduces any influence of this correltion on the results. (Bafumi and Gelman)

## Basically, the idea is that maybe large differences in social cohesion
## within pair are correlated with high overall perceptions. But, of course
## large differences in perceptions between only two people would increase the
## average perception (i.e. an average of 0 and .1 is smaller than an average
## of 0 and .8). So, in fact, I think we should not include this in the model
## --- it is just another version of the individual effect. To the extent that
## is not perfectly correlated, it is because of a lot of zeros and/or
## non-linearities: basically there are people who say that they perceive say,
## .1, who are paired with people who perceive .2,.3,.4,0, etc.. This means
## that the two variables are not perfectly correlated. (See the pairs plot below).

## So this means that, because we have pairs, we basically get around the
## problems articulated by Bafumi and Gelman.

## We want to regress the outcome on perceptions (vm.community.subj) while:

## (1) focusing the estimation on differences *within pair*

## (2) removing any extra small linear relationships remaining between
## perceptions and objective within pair  and between outcome and objective
## context within pair

## We need to exclude people with the same value for perceptions otherwise the Bayesian approach fails.
## There are few such pairs (126/2 = 63)
table(wdat0$perc_more)

## Models with crossed random effects
lmer1_social_cohesion <- lmer(social.capital01 ~ vm.community.norm2 + da_prop_vm_20pct_06 + (1 | pair) + (1 | dauid),
  data = wdat0
)

summary(lmer1_social_cohesion, corr = FALSE)

## Checking the likelihood approach using Bayes with flat and student-t priors.
## This next is slow. for threading needs cmdstanr which requires a separate
## installation https://mc-stan.org/cmdstanr/articles/cmdstanr.html here we are
## using 4 chains with 2 threads per chain (so 8 cores)

blmer1_social_cohesion <- brm(social.capital01 ~ vm.community.norm2 + da_prop_vm_20pct_06 + (1 | pair) + (1 | dauid),
  data = wdat0,
  chains = 4, iter = 2000, cores = 4,
  #   backend = "cmdstanr", threads = threading(2),
  control = list(adapt_delta = 0.99, max_treedepth = 20)
)

summary(blmer1_social_cohesion)
default_prior(blmer1_social_cohesion)

#> default_prior(blmer1_social_cohesion)
#                  prior     class                coef group resp dpar nlpar lb ub       source
#                 (flat)         b                                                      default
#                 (flat)         b da_prop_vm_20pct_06                             (vectorized)
#                 (flat)         b  vm.community.norm2                             (vectorized)
# student_t(3, 0.8, 2.5) Intercept                                                      default
#   student_t(3, 0, 2.5)        sd                                            0         default
#   student_t(3, 0, 2.5)        sd                     dauid                  0    (vectorized)
#   student_t(3, 0, 2.5)        sd           Intercept dauid                  0    (vectorized)
#   student_t(3, 0, 2.5)        sd                      pair                  0    (vectorized)
#   student_t(3, 0, 2.5)        sd           Intercept  pair                  0    (vectorized)
#   student_t(3, 0, 2.5)     sigma                                            0         default


## Now the fixed effects approach. There is a debate about cluster-correction
## for the pairs, I don't think it makes sense in this case The SE correction
## here is the HC1 correction.

fe1_social_capital <- feols(social.capital01 ~ vm.community.norm2 + da_prop_vm_20pct_06 | pair,
  data = wdat0,
  se = "hetero"
)
summary(fe1_social_capital)

## This next is very slow. Again, used for fragility checks. It is using the HC2 SE, which I thin is better

fe2_social_capital <- lm_robust(social.capital01 ~ vm.community.norm2 + da_prop_vm_20pct_06, fixed_effects = ~pair, data = wdat0)
summary(fe2_social_capital)

## Using ranks as alternative way to interpret the results

## The idea is to compare the higher perceiver to the lower perceiver ---
## ignoring the size of the difference in perceptions.

with(wdat0, table(perc_more, social.capital01, exclude = c()))
cohesion_rank_tab <- with(wdat0[wdat0$perc_more != .5, ], table(perc_more, social_capital01_rank))
cohesion_rank_tab
## More people were low on social cohesion who were the higher perceiver
prop.table(cohesion_rank_tab, margin = 1)
prop.table(cohesion_rank_tab, margin = 2)

### The rank-based models do not need random effects for pair because the pair
### mean is always the same (.5) --- there is no variation here.
lmer1_social_cohesion_rank <- lmer(social_capital01_rank ~ perc_more + da_prop_vm_20pct_06 + (1 | dauid), data = wdat0)
summary(lmer1_social_cohesion_rank)
## Regardless of optimizer same answer
# check_lmer1_social_cohesion_rank<-allFit(lmer1_social_cohesion_rank)

## Looking at the fully bayesian approach because of warnings about singular fits. In the end the results are the same.
blmer1_social_cohesion_rank <- brm(social_capital01_rank ~ perc_more + da_prop_vm_20pct_06 + (1 | dauid),
  data = wdat0, chains = 4, iter = 1000, cores = 4,
  control = list(adapt_delta = 0.99, max_treedepth = 20)
)
summary(blmer1_social_cohesion_rank)
## pp <- brms::pp_check(blmer1_social_cohesion_rank)
## print(pp)

get_prior(blmer1_social_cohesion_rank)
#                   prior     class                coef group resp dpar nlpar lb ub       source
#                  (flat)         b                                                      default
#                  (flat)         b da_prop_vm_20pct_06                             (vectorized)
#                  (flat)         b           perc_more                             (vectorized)
#  student_t(3, 0.5, 2.5) Intercept                                                      default
#    student_t(3, 0, 2.5)        sd                                            0         default
#    student_t(3, 0, 2.5)        sd                     dauid                  0    (vectorized)
#    student_t(3, 0, 2.5)        sd           Intercept dauid                  0    (vectorized)
#    student_t(3, 0, 2.5)     sigma                                            0         default

default_prior(blmer1_social_cohesion_rank)
#                   prior     class                coef group resp dpar nlpar lb ub       source
#                  (flat)         b                                                      default
#                  (flat)         b da_prop_vm_20pct_06                             (vectorized)
#                  (flat)         b           perc_more                             (vectorized)
#  student_t(3, 0.5, 2.5) Intercept                                                      default
#    student_t(3, 0, 2.5)        sd                                            0         default
#    student_t(3, 0, 2.5)        sd                     dauid                  0    (vectorized)
#    student_t(3, 0, 2.5)        sd           Intercept dauid                  0    (vectorized)
#    student_t(3, 0, 2.5)     sigma                                            0         default
####################
### Now for the community efficacy

with(wdat0, table(perc_more, community.resp01, exclude = c()))
community_rank_tab <- with(wdat0[wdat0$perc_more != .5, ], table(perc_more, community_resp01_rank))
community_rank_tab
## More people were low on social community who were the higher perceiver
prop.table(community_rank_tab, margin = 1)
prop.table(community_rank_tab, margin = 2)

lmer1_community_efficacy <- lmer(community.resp01 ~ vm.community.norm2 + da_prop_vm_20pct_06 + (1 | pair) + (1 | dauid), data = wdat0)
summary(lmer1_community_efficacy)

blmer1_community_efficacy <- brm(community.resp01 ~ vm.community.norm2 + da_prop_vm_20pct_06 + (1 | pair) + (1 | dauid),
  data = wdat0,
  chains = 4, iter = 2000, cores = 4,
  #  backend = "cmdstanr", threads = threading(2),
  control = list(adapt_delta = 0.99, max_treedepth = 20)
)
summary(blmer1_community_efficacy)
default_prior(blmer1_community_efficacy)

fe1_community_efficacy <- feols(community.resp01 ~ vm.community.norm2 | pair,
  data = wdat0, se = "hetero"
)
summary(fe1_community_efficacy)

fe2_community_efficacy <- lm_robust(community.resp01 ~ vm.community.norm2 + da_prop_vm_20pct_06,
  fixed_effects = ~pair, data = wdat0
)
summary(fe2_community_efficacy)

### The rank-based models do not need random effects for pair because the pair
### mean is always the same (.5) --- there is no variation here.
lmer1_community_efficacy_rank <- lmer(community_resp01_rank ~ perc_more + da_prop_vm_20pct_06 + (1 | dauid), data = wdat0)
summary(lmer1_community_efficacy_rank)
## Regardless of optimizer same answer
# check_lmer1_community_efficacy_rank<-allFit(lmer1_community_efficacy_rank)

blmer1_community_efficacy_rank <- brm(community_resp01_rank ~ perc_more + da_prop_vm_20pct_06 + (1 | dauid),
  data = wdat0, chains = 4, iter = 1000, cores = 4,
  control = list(adapt_delta = 0.99, max_treedepth = 20)
)
summary(blmer1_community_efficacy_rank)
## pp <- brms::pp_check(blmer1_community_efficacy_rank)
## print(pp)

## Create the appendix table body

## sc_profile_mlm <- lme4:::profile.merMod(lmer1_social_cohesion,which="vm.community.norm2",delta.cutoff=1/20)
## sc_prof_dt <- as.data.frame(sc_profile_mlm)
## sc_prof_dt %>% filter(abs(.zeta) < 2.00 & abs(.zeta) > 1.9)

social_cohesion_mlm_rank_res <- c(summary(lmer1_social_cohesion_rank)$coef[
  "perc_more",
  c("Estimate", "Std. Error")
], confint(lmer1_social_cohesion_rank, parm = "perc_more"))

social_cohesion_mlm_res <- c(summary(lmer1_social_cohesion)$coef[
  "vm.community.norm2",
  c("Estimate", "Std. Error")
], confint(lmer1_social_cohesion, parm = "vm.community.norm2"))

social_cohesion_fe_res <- tidy(fe2_social_capital) %>%
  filter(term == "vm.community.norm2") %>%
  select(estimate, std.error, conf.low, conf.high) %>%
  unlist()

social_cohesion_bmlm_res <- fixef(blmer1_social_cohesion)["vm.community.norm2", ]
social_cohesion_bmlm_rank_res <- fixef(blmer1_social_cohesion_rank)["perc_more", ]

community_efficacy_mlm_res <- c(summary(lmer1_community_efficacy)$coef[
  "vm.community.norm2",
  c("Estimate", "Std. Error")
], confint(lmer1_community_efficacy, parm = "vm.community.norm2"))

community_efficacy_mlm_rank_res <- c(summary(lmer1_community_efficacy_rank)$coef[
  "perc_more",
  c("Estimate", "Std. Error")
], confint(lmer1_community_efficacy_rank, parm = "perc_more"))

community_efficacy_fe_res <- tidy(fe2_community_efficacy) %>%
  filter(term == "vm.community.norm2") %>%
  select(estimate, std.error, conf.low, conf.high) %>%
  unlist()

community_efficacy_bmlm_res <- fixef(blmer1_community_efficacy)["vm.community.norm2", ]
community_efficacy_bmlm_rank_res <- fixef(blmer1_community_efficacy_rank)["perc_more", ]

appendix_tab <- cbind(
  "Social Cohesion MLM" = social_cohesion_mlm_res,
  "Social Cohesion Bayes MLM" = social_cohesion_bmlm_res,
  "Social Cohesion FE" = social_cohesion_fe_res,
  "Collective Efficacy MLM" = community_efficacy_mlm_res,
  "Collective Efficacy Bayes MLM" = community_efficacy_bmlm_res,
  "Collective Efficacy FE" = community_efficacy_fe_res
)
row.names(appendix_tab)[3:4] <- c("ci1", "ci2")

appendix_tab
#            Social Cohesion MLM Social Cohesion Bayes MLM
# Estimate           -0.06553736               -0.06586773
# Std. Error          0.01053103                0.01040184
# ci1                -0.08617576               -0.08650562
# ci2                -0.04490193               -0.04595609
#            Social Cohesion FE Collective Efficacy MLM
# Estimate          -0.06603539             -0.05057089
# Std. Error         0.01658262              0.01397528
# ci1               -0.09855762             -0.07797394
# ci2               -0.03351317             -0.02318029
#            Collective Efficacy Bayes MLM
# Estimate                     -0.05026532
# Std. Error                    0.01410360
# ci1                          -0.07770287
# ci2                          -0.02226625
#            Collective Efficacy FE
# Estimate             -0.033269100
# Std. Error            0.021422384
# ci1                  -0.075283193
# ci2                   0.008744993


res <- rbind(
  lmer1_social_cohesion = social_cohesion_mlm_res,
  lmer1_community_efficacy = community_efficacy_mlm_res
)
colnames(res)[3:4] <- c("ci1", "ci2")
row.names(res) <- c(
  "Social Cohesion",
  "Collective Efficacy"
)

res_rank <- rbind(
  lmer1_social_cohesion_rank_mlm = social_cohesion_mlm_rank_res,
  lmer1_social_cohesion_rank_bmlm = social_cohesion_bmlm_rank_res,
  lmer1_community_efficacy_rank_mlm = community_efficacy_mlm_rank_res,
  lmer1_community_efficacy_rank_bmlm = community_efficacy_bmlm_rank_res
)
colnames(res_rank)[3:4] <- c("ci1", "ci2")
row.names(res_rank) <- c(
  "Social Cohesion (lmer)",
  "Social Cohesion (brm)",
  "Collective Efficacy (lmer)",
  "Collective Efficacy (brm)"
)

res_rank
#                               Estimate Std. Error
# Social Cohesion (lmer)     -0.05249205 0.01470844
# Social Cohesion (brm)      -0.05212955 0.01466473
# Collective Efficacy (lmer) -0.02969247 0.01453784
# Collective Efficacy (brm)  -0.02962762 0.01394223
#                                    ci1          ci2
# Social Cohesion (lmer)     -0.08131592 -0.023668171
# Social Cohesion (brm)      -0.08077841 -0.023539692
# Collective Efficacy (lmer) -0.05818203 -0.001202913
# Collective Efficacy (brm)  -0.05650843 -0.003190642

## Record number of pairs, number of DAs.
stopifnot(unique(table(wdat0$pair)) == 2)

num_pairs <- sum(!is.na(wdat0$pair)) / 2
num_das <- length(unique(wdat0$dauid[!is.na(wdat0$pair)]))

num_pairs
# [1] 1886

num_das
# [1] 3098

table(!is.na(wdat0$pair))
#
# TRUE
# 3772
table(wdat0$perc_more)
#
#    0    1
# 1886 1886

## Make data at the level of the pair to illustrate the pairwise differences in later tables or figures
paired_data <- wdat0 %>%
  group_by(pair) %>%
  summarize(
    social_cohesion_diff = social.capital01[perc_more == 1] - social.capital01[perc_more == 0],
    community_efficacy_diff = community.resp01[perc_more == 1] - community.resp01[perc_more == 0],
    perceptions_diff = vm.community.norm2[perc_more == 1] - vm.community.norm2[perc_more == 0]
  ) %>%
  ungroup() %>%
  na.omit()

## In how many pairs is the higher perceiver the person with person with the
## lower social cohesion score?
table(paired_data$social_cohesion_diff < 0)
#
# FALSE  TRUE
#  1065   821
table(paired_data$social_cohesion_diff < 0) / nrow(paired_data)
#
#     FALSE      TRUE
# 0.5646872 0.4353128
## In how many pairs is the higher perceiver the person with person with the lower community efficacy score?
table(paired_data$community_efficacy_diff < 0)
#
# FALSE  TRUE
#  1106   780
table(paired_data$community_efficacy_diff < 0) / nrow(paired_data)
#
#     FALSE      TRUE
# 0.5864263 0.4135737

save(res, res_rank, num_pairs, num_das, appendix_tab, paired_data,
  blmer1_social_cohesion_rank, blmer1_community_efficacy_rank,
  file = here(
    "Analysis",
    "analysis_anyDA_new.rda"
  )
)
